home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
TeX 1995 July
/
TeX CD-ROM July 1995 (Disc 1)(Walnut Creek)(1995).ISO
/
biblio
/
bibtex
/
utils
/
refer-tools
/
ref2bib.awk
< prev
next >
Wrap
Text File
|
1994-11-13
|
13KB
|
507 lines
#
# awk script to convert refer (or bib) format databases
# to BiBTeX format.
#
# written by Peter King, Heriot-Watt University
# use freely, but dont claim that you wrote it
#
# Generates keys using authors names and year (see %A entry )
#
# You may wish to alter treatment of key fields that are ignored
# such as %U %W %Y %K etc.
#
# regular expressions should be sorted according to frequency
# so that minimal tests are made
# From tests in a local data base the order given appears quite good
# 2883 %A
# 1813 blank lines
# 1774 %T
# 1764 %D
# 1505 %P
# 1347 %J
# 1331 %V
# 1201 %N
# 773 .. continuation lines
# 501 %C
# 424 %I
# 192 %B
# 187 %E
# 92 %S
# 89 %R
# 33 %X
# 30 %K
# 16 %O
# 12 %any other % lines
#
BEGIN {
for(i=1;i<=27;i++)
addkey[i] = substr(" abcdefghijklmnopqrstuvwxyz",i,1);
lkey = 3; # number of characters used from authors to make key
maxauthor = 3; # maximum number of authors to use in
# constructing key
rx = 1
}
/\\[*u0]/ || /\\d[^{]/|| /\\s[^s]/ {
err = 1
print "Non translated \\ symbol : Reference " rx > "ref2bib.errs"
print $0 > "ref2bib.errs"
}
/^%A/ {
if (A==0) keys="";
A ++; lastx = "A";
authors[A] = substr($0,4)
if(A> maxauthor) next
ic = 0
lc = 1
while(ic < lkey && lc <= length($NF) ){
kc = substr( $NF, lc, 1)
if ( kc ~ /[a-zA-Z]/ ){
keys = keys kc
ic++
if(ic==lkey) next
}
else if ( kc == "\\" ) lc ++;
lc ++;
}
next
}
/^$/ {
if(NR==pr+1){
}
else {
refs ++
# if FILENAME != prevname then new file
acnt[A]++;if(A>MaxA)MaxA=A;
if(T==0)print "No title : Reference "refs" "keys > "ref2bib.errs"
if(A==0)print "No author : Reference "refs" "keys > "ref2bib.errs"
if(D==0)print "No date : Reference "refs" "keys > "ref2bib.errs"
if( (!T)||(!A)||(!D))err=1;
# classify the reference
if(J){
#journal or conference
if(B||E||R)print "Journal & book?: Reference "refs" "keys > "ref2bib.errs"
if(C||I) {conf++
type = "INPROCEEDINGS"
}
else{
jour ++;
type = "ARTICLE"
}
if(!P) print "No page nos.? : Reference "refs" "keys > "ref2bib.errs"
if( B||E||R||(!P))err=1
if(err){
print "Journal reference in error" > "ref2bib.errs"
}
}
else
if(B){
# article in book
type = "INCOLLECTION"
bookart++
if(N||R||(!E)||(!I)||(!C)||(!P)||(V&&(!S)))err=1
if(!E) print "No editor? Reference "refs" " keys > "ref2bib.errs"
if(!I) print "No publisher? Reference "refs" " keys > "ref2bib.errs"
if(!C) print "No city? Reference "refs" " keys > "ref2bib.errs"
if(!P) print "No page nos.? Reference "refs" " keys > "ref2bib.errs"
if(V&&(!S))print "Volume but no Series Reference "refs" " keys > "ref2bib.errs"
if(N)print "Issue no.? Reference "refs" " keys > "ref2bib.errs"
if(R)print "Report? Reference "refs" " keys > "ref2bib.errs"
if(err){
print "Book reference in error" > "ref2bib.errs"
}
}
else if(R){
#report
type = "TECHREPORT"
reps++
if(E||N)err=1
if(N)print "Issue no.? Reference "refs" " keys > "ref2bib.errs"
if(E) print "Editor? Reference "refs" " keys > "ref2bib.errs"
if(err){
print "Report reference in error" > "ref2bib.errs"
}
}
else if(I){
wholebook ++
type = "BOOK"
# book
if(N||R||E||(!C)||(V&&(!S)))err=1
if(!C) print "No city? Reference "refs" " keys > "ref2bib.errs"
if(N)print "Issue no.? Reference "refs" " keys > "ref2bib.errs"
if(E)print "Editor? Reference "refs" " keys > "ref2bib.errs"
if(V&&(!S))print "Volume but no Series Reference "refs" " keys > "ref2bib.errs"
if(err){
print "Book reference in error" > "ref2bib.errs"
}
}
else {
unclass ++
type = "MISC"
err=1
print "Unclassified reference in error" > "ref2bib.errs"
}
# generate date
ndate = split(date,df)
if ( ndate > 2) print " Funny date " date > "ref2bib.errs"
if (ndate == 1 ) { df[2] = df[1]; df[1] = ""; }
# generate key
if(keys == "") keys = "ANON"
keys = keys substr(df[2],3,2)
if(keyused[keys] >=1) {
key_suffix = keyused[keys]++;
keys = keys addkey[key_suffix];
}
else keyused[keys] = 1
if (err) {
print "Key: " keys > "ref2bib.errs"
if(A) for (i=1;i<=A;i++)
print "%A " authors[i] > "ref2bib.errs"
if(T) print "%T " title > "ref2bib.errs"
if(J) print "%J "journal > "ref2bib.errs"
if(B) print "%B "book > "ref2bib.errs"
if(V) print "%V "volume > "ref2bib.errs"
if(N) print "%N "number > "ref2bib.errs"
if(I) print "%I "publisher > "ref2bib.errs"
if(C) print "%C "city > "ref2bib.errs"
if(E) for (i=1;i<=E;i++)print "%E "editor[i] > "ref2bib.errs"
if(S) print "%S "series > "ref2bib.errs"
if(P) print "%P "pages > "ref2bib.errs"
if(R) print "%R "report > "ref2bib.errs"
if(D) print "%D "date > "ref2bib.errs"
if(O) print "%O "other > "ref2bib.errs"
print "" > "ref2bib.errs"
}
if(T){
twc = split(title,z)
title = z[1]; lt = length(z[1]);
for(i=2;i<=twc;i++) {
if(lt +length(z[i]) >= 55) {sc = "\n\t\t";lt = 0;}
else sc = " ";
title = title sc z[i]
lt += length(z[i]) + 1
}
}
if(O){
twc = split(other,z)
other = z[1]; lt = length(z[1]);
for(i=2;i<=twc;i++) {
if(lt + length(z[i]) >= 55) {sc = "\n\t\t";lt = 0;}
else sc = " ";
other = other sc z[i]
lt += length(z[i]) + 1
}
}
if(X){
twc = split(abstr,z)
abstr = z[1]; lt = length(z[1]);
for(i=2;i<=twc;i++) {
if(lt + length(z[i]) >= 55) {sc = "\n\t\t";lt = 0;}
else sc = " ";
abstr = abstr sc z[i]
lt += length(z[i]) + 1
}
}
printf "@%s{%s",type,keys
if(A) {
printf ",\n\tauthor={%s",authors[1]
for(i=2;i<=A;i++) printf " and %s",authors[i]
printf "}"
}
if(T) printf ",\n\ttitle={%s}",title
if(B) printf ",\n\tbooktitle={%s}",book
if(E) {
printf ",\n\teditor={%s",editor[1]
for(i=2;i<=E;i++) printf " and\n\t\t%s",editor[i]
printf "}"
}
if(I) printf ",\n\tpublisher={%s}",publisher
if(C) printf ",\n\taddress={%s}",city
if(J) { # substitute the journal abbreviations from the standard styles
journal = "{ " journal "}"
# {acmcs} {"ACM Computing Surveys"}
if ( journal ~ /Comp.* Sur/ ) journal = "acmcs"
# {acta} {"Acta Informatica"}
if ( journal ~ /Acta Inf/ ) journal = "acta"
# {cacm} {"Communications of the ACM"}
if ( journal ~ /Com.* ACM/ ) journal = "cacm"
if ( journal ~ /CACM/ ) journal = "cacm"
# {ibmjrd} {"IBM Journal of Research and Development"}
if ( journal ~ /IBM J.*R.*D/ ) journal = "ibmjrd"
# {ibmsj} {"IBM Systems Journal"}
if ( journal ~ /IBM Sy.*J/ ) journal = "ibmsj"
# {ieeese} {"IEEE Transactions on Software Engineering"}
if ( journal ~ /IEEE Tran.*Soft.*Eng/ ) journal = "ieeese"
# {ieeetc} {"IEEE Transactions on Computers"}
if ( journal ~ /IEEE Tran.*Computers/ ) journal = "ieeetc"
# {ieeetcad}
if ( journal ~ /IEEE Tran.*Comp.*Desig/ ) journal = "ieeetcad"
# {ipl} {"Information Processing Letters"}
if ( journal ~ /Inf.*Proc.*Lett/ ) journal = "ipl"
# {jacm} {"Journal of the ACM"}
if ( journal ~ /Jou.* ACM/ ) journal = "jacm"
if ( journal ~ /JACM/ ) journal = "jacm"
# {jcss} {"Journal of Computer and System Sciences"}
if ( journal ~ /J.*Comp.*Sys.*Sc/ ) journal = "jcss"
# {scp} {"Science of Computer Programming"}
if ( journal ~ /Sc.*Comp.*Prog/ ) journal = "scp"
# {sicomp} {"SIAM Journal on Computing"}
if ( journal ~ /SIAM .*Comp/ ) journal = "sicomp"
# {tocs} {"ACM Transactions on Computer Systems"}
if ( journal ~ /ACM Tran.*Comp.*Sys/ ) journal = "tocs"
# {tods} {"ACM Transactions on Database Systems"}
if ( journal ~ /ACM Tran.*Data.*Sys/ ) journal = "tods"
# {tog} {"ACM Transactions on Graphics"}
if ( journal ~ /ACM Tran.*Grap/ ) journal = "tog"
# {toms} {"ACM Transactions on Mathematical Software"}
if ( journal ~ /ACM Tran.*Math.*Soft/ ) journal = "toms"
# {toois} {"ACM Transactions on Office Information Systems"}
if ( journal ~ /ACM Tran.*Off.*Inf.*Sys/ ) journal = "toois"
# {toplas} {"ACM Transactions on Programming Languages and Systems"}
if ( journal ~ /ACM Tran.*Prog.*Lan.*Sys/ ) journal = "toplas"
# {tcs} {"Theoretical Computer Science"}
if ( journal ~ /Th.*Comp.*Sci/ ) journal = "tcs"
printf ",\n\tjournal=%s",journal
}
if(V) printf ",\n\tvolume={%s}",volume
if(N) printf ",\n\tnumber={%s}",number
if(P) printf ",\n\tpages={%s}",pages
if(O) printf ",\n\tnote={%s}",other
if(R) printf ",\n\tnumber={%s}",report
if(S) printf ",\n\tseries={%s}",series
if(df[1] != "")
printf ",\n\tmonth={%s}",df[1]
if(D) printf ",\n\tyear={%s}",df[2]
if(X) printf ",\n\tannote={%s}",abstr
if(L) printf ",\n\tkey={%s}",label
printf "\n}\n\n"
A=0;B=0;C=0;D=0;E=0;F=0;G=0;H=0;I=0;J=0;
K=0;L=0;M=0;N=0;O=0;P=0;Q=0;R=0;S=0;T=0;
U=0;V=0;W=0;X=0;Y=0;Z=0;
type = ""
book=""
title = ""
volume = ""
city = ""
date = ""
publisher = ""
journal = ""
number = ""
other = ""
page = ""
report = ""
series = ""
toterr +=err
rx++
}
err = 0
pr = NR
next
}
/^%T/ {
T ++; lastx = "T"
if(T>1){err=1
print "Two titles: Reference " rx > "ref2bib.errs"
print title > "ref2bib.errs"
}
title = substr($0,4)
next
}
/^%D/ {
D ++; lastx = "D"
if(D>1){err=1
print "Two dates: Reference " rx > "ref2bib.errs"
print date > "ref2bib.errs"
}
if(($NF<1900)||($NF>=2000)){err=1
print "Date error? : Reference " rx > "ref2bib.errs"
}
date = substr($0,4);
next
}
/^%P/ {
P ++; lastx = "P"
if(P>1){err=1
print "Two page nos? : Reference " rx > "ref2bib.errs"
print pages > "ref2bib.errs"
}
pages = substr($0,4)
next
}
/^%J/ {
J ++; lastx = "J"
if(J>1){err=1
print "Two journals: Reference " rx > "ref2bib.errs"
print journal > "ref2bib.errs"
}
journal = substr($0,4)
next
}
/^%V/ {
V ++; lastx = "V"
if(V>1){err=1
print "Two volumes: Reference " rx > "ref2bib.errs"
print volume > "ref2bib.errs"
}
volume = substr($0,4)
next
}
/^%N/ {
N ++; lastx = "N"
if(N>1){err=1
print "Two issue numbers: Reference " rx > "ref2bib.errs"
print number > "ref2bib.errs"
}
number = substr($0,4)
next
}
/^[^%]/ {
if( lastx == "A") authors[A] = authors[A] " " $0
if( lastx == "B") book = book " " $0
if( lastx == "C") city = city " " $0
if( lastx == "D") date = date " " $0
if( lastx == "E") editor[E] = editor[E] " " $0
if( lastx == "I") publisher = publisher " " $0
if( lastx == "J") journal = journal " " $0
if( lastx == "L") label = label " " $0
if( lastx == "N") number = number " " $0
if( lastx == "O") other = other " " $0
if( lastx == "P") pages = pages " " $0
if( lastx == "R") report = report " " $0
if( lastx == "S") series = series " " $0
if( lastx == "T") title = title " " $0
if( lastx == "V") volume = volume " " $0
if( lastx == "X") abstr = abstr " " $0
next
}
/^%C/ {
C ++; lastx = "C"
if(C>1){err=1
print "Two cities: Reference " rx > "ref2bib.errs"
print city > "ref2bib.errs"
print " 2 cities " FILENAME, pr+1, NR > "ref2bib.errs"
}
city = substr($0,4)
next
}
/^%I/ {
I ++; lastx = "I"
if(I>1){err=1
print "Two publishers: Reference " rx > "ref2bib.errs"
print publisher > "ref2bib.errs"
}
publisher = substr($0,4)
next
}
/^%B/ {
B ++; lastx = "B"
if(B>1){err=1
print "Two books: Reference " rx > "ref2bib.errs"
print book > "ref2bib.errs"
}
book = substr($0,4)
next
}
/^%E/ { # this really deals with 'bib' format
# refer only allows one %E fielsd, so we ought to
# split it somehow
E ++; lastx = "E"
editor[E] = substr($0,4)
next
}
/^%[^ABCDEIJKLNOPRSTVX]/ {
F ++; lastx = "F"; # should not get these
print "Unexpected flag: Reference " rx > "ref2bib.errs"
print $0 > "ref2bib.errs"
err = 1
next
}
/^%O/ {
O ++; lastx = "O"
if(O>1){err=1
print "Two others: Reference " rx > "ref2bib.errs"
print other > "ref2bib.errs"
}
other = substr($0,4)
next
}
/^%S/ {
S ++; lastx = "S"
if(S>1){err=1
print "Two series: Reference " rx > "ref2bib.errs"
print series > "ref2bib.errs"
}
series = substr($0,4)
next
}
/^%R/ {
R ++; lastx = "R"
if(R>1){err=1
print "Two reports: Reference " rx > "ref2bib.errs"
print report > "ref2bib.errs"
}
report = substr($0,4)
next
}
/^%X/ {
X ++; lastx = "X"
abstr = substr($0,4)
if(X>1){err=1
print "Two abstracts: Reference " rx > "ref2bib.errs"
}
next
}
/^%K/ {
lastx = "K"
next
}
END {
print refs " references" > "ref2bib.errs"
if(toterr) print toterr " erroneous" > "ref2bib.errs"
if(conf) print conf " conference papers" > "ref2bib.errs"
if(jour) print jour " journal articles" > "ref2bib.errs"
if(wholebook) print wholebook " books" > "ref2bib.errs"
if(totB) print totB " book articles" > "ref2bib.errs"
if(reps) print reps " reports" > "ref2bib.errs"
if(unclass) print unclass " Unclassified" > "ref2bib.errs"
if(totO) print totO " have additional information." > "ref2bib.errs"
if(totK) print totK " have additional keywords." > "ref2bib.errs"
if(totX) print totX " have abstracts/commentaries." > "ref2bib.errs"
print totA " authors" > "ref2bib.errs"
for(i=0;i<=MaxA;i++)if(acnt[i]){
print i, " authors ", acnt[i] > "ref2bib.errs"
av += i*acnt[i]
}
print "Average ", av/refs > "ref2bib.errs"
print totT " titles" > "ref2bib.errs"
print "Key frequencies" > "ref2bib.errs"
for(k in keyused) print k, keyused[k] > "ref2bib.errs"
}